rm(list=ls(all=TRUE))

### data file created by running script_01_relicate_original_results.R
load("Data/Temp Files/BIW_replicated_datasets.RData")


feols(deviate ~ rank_p_overall  | icpsr, data = subset(da , congress < 113 &!cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")
feols(deviate ~ rank_p  | icpsr, data = subset(da2 , congress < 113 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")
### just run this to check everything is correct



#####################################################################
#############    Randomization inference - table 7,8   ##############
#####################################################################
### results from these simulations also used in Table 5 and associated figures
nsims <- 10000
output4 <- output4rc <- data.frame(matrix(NA , nrow = nsims , ncol = 24))
### first 8 columns close votes; next 8 lopsided; final 8 all votes

### add close indicator to dataset
da <- da %>% as.data.table %>%
  mutate(. , yes_perc = tot_yes / tot_voters) %>%
  #mutate(. , close = ifelse(yes_perc >= 0.35 & yes_perc <= 0.65, 1 , 0))
  mutate(. , close = r_close)
### USE BIW definition of close, even with errors, for comparability
### Note can only look through 112th congress, because supermajority data not available afterwards


da_temp1 <- subset(da , congress < 113)
names.unique <- unique(da_temp1$bioname)
### subset here, rather than in loop, not to slow down code

set.seed(12345)
for(i in 1:nsims){
  temp <- data.frame(bioname = names.unique,
                     name2 = sample(names.unique , length(names.unique) , replace = FALSE)) %>% as.data.table()
  da_sim <- join(da_temp1 , temp , by = "bioname") 
  
  for(k in 1:2){
    if(k==1){
      da_sim <- da_sim[order(da_sim$roll_call , da_sim$name2),]
      da_sim2 <- da_sim[, `:=` (rank_p_rc = (seq_along(name2)-1)/(length(name2)-1)) , by = roll_call ]
      #)
    }
    if(k==2){
      da_sim2 <- subset(da_sim , !cast_code%in%c(9))
      da_sim2 <- da_sim2[order(da_sim2$roll_call , da_sim2$name2),]
      da_sim2 <- da_sim2[, `:=` (rank_p_rc = (seq_along(name2)-1)/(length(name2)-1)) , by = roll_call ]
    }
    
    #much smaller standard errors
    ttttemp <- matrix(NA , nrow = 1 , ncol = 24)
    for(j in 1:3){
      tttemp <- matrix(NA , nrow = 1 , ncol = 8)
      if(j==1) da_sim3 <- subset(da_sim2 , close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate))
      if(j==2) da_sim3 <- subset(da_sim2 , close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate))
      if(j==3) da_sim3 <- subset(da_sim2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate))
      
      ttemp <- feols(deviate ~ rank_p_rc  | interaction(state_abbrev,party_code,congress), data = da_sim3)
      ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
      tttemp[1,1:4] <- unlist(ttemp1)
      ttemp <- feols(deviate ~ rank_p_rc  | interaction(state_abbrev,party_code,roll_call), data = da_sim3)
      ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
      tttemp[1,5:8] <- unlist(ttemp1)
      if(j==1) ttttemp[1,1:8] <- tttemp
      if(j==2) ttttemp[1,9:16] <- tttemp
      if(j==3) ttttemp[1,17:24] <- tttemp
    }
    
    if(k==1) output4[i,] <- ttttemp
    if(k==2) output4rc[i,] <- ttttemp
  }
  cat("\r", i, "of", nsims) ### How far loop has progressed
}


save(output4 , output4rc,
     file = "Data/Temp Files/state_delegation_design_RI_sims.RData")

################################################################################
####################### LOAD DATA AND START FROM HERE ##########################
################################################################################
load(file = "Data/Temp Files/state_delegation_design_RI_sims.RData")

table4 <- matrix(NA , nrow = 2 , ncol = 4)

estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X17)
mean(output4rc$X17 <= -0.172)

table4[1,1] <- estimate[1,1]
table4[2,1] <- mean(abs(output4rc$X17) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X21)
mean(output4rc$X21 <= estimate[1,1])
mean(abs(output4rc$X21) >= abs(estimate[1,1]))
mean(output4rc$X21 <= -.114)
table4[1,2] <- estimate[1,1]
table4[2,2] <- mean(abs(output4rc$X17) >= abs(estimate[1,1]))



estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X17)
mean(output4$X17 <= estimate[1,1])
mean(output4$X17 <= -.221)
table4[1,3] <- estimate[1,1]
table4[2,3] <- mean(abs(output4$X17) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X21)
mean(output4$X21 <= estimate[1,1])
mean(output4$X21 <= -.511)
table4[1,4] <- estimate[1,1]
table4[2,4] <- mean(abs(output4$X21) >= abs(estimate[1,1]))


sink("Generated Files/Replicated Table 4.txt")
table4 %>% round(. , 3)
sink(file = NULL)

### close votes (add indicator to da2)
da2 <- subset(da , !cast_code%in%c(9)) %>% as.data.table %>%
  mutate(. , yes_perc = tot_yes / tot_voters) %>%
  #mutate(. , close = ifelse(yes_perc >= 0.35 & yes_perc <= 0.65, 1 , 0))
  mutate(. , close = r_close)
da2 <- da2[order(da2$roll_call , da2$bioname),]
da2 <- da2[, `:=` (rank_p = (seq_along(bioname)-1)/(length(bioname)-1)) , by = roll_call ]

table(ca2$close,useNA='always')

table8 <- matrix(NA , nrow = 4 , ncol = 4)

estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X1)
mean(output4rc$X1 <= estimate[1,1])
table8[1,1] <- estimate[1,1]
table8[2,1] <- mean(abs(output4rc$X1) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X5)
mean(output4rc$X5 <= estimate[1,1])
table8[1,2] <- estimate[1,1]
table8[2,2] <- mean(abs(output4rc$X5) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X1)
mean(output4$X1 <= estimate[1,1])
table8[1,3] <- estimate[1,1]
table8[2,3] <- mean(abs(output4$X1) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X5)
mean(output4$X5 <= estimate[1,1])
table8[1,4] <- estimate[1,1]
table8[2,4] <- mean(abs(output4$X5) >= abs(estimate[1,1]))


### lopsided votes

estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X9)
mean(output4rc$X9 <= estimate[1,1])
table8[3,1] <- estimate[1,1]
table8[4,1] <- mean(abs(output4rc$X9) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4rc$X13)
mean(output4rc$X13 <= estimate[1,1])
table8[3,2] <- estimate[1,1]
table8[4,2] <- mean(abs(output4rc$X13) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,congress), 
                  data = subset(da2 , congress < 113 & close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X9)
mean(output4$X9 <= estimate[1,1])
table8[3,3] <- estimate[1,1]
table8[4,3] <- mean(abs(output4$X9) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,roll_call), 
                  data = subset(da2 , congress < 113 & close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
sd(output4$X13)
mean(output4$X13 <= estimate[1,1])
table8[3,4] <- estimate[1,1]
table8[4,4] <- mean(abs(output4$X13) >= abs(estimate[1,1]))


sink("Generated Files/Replicated Table 8.txt")
table8 %>% round(. , 3)
sink(file = NULL)

### table 8 combined with Table 4 during revision process


################################################################################
###### look at differences in ideology among house members
################################################################################

daH <- fread("Data/Hall_votes.csv" , stringsAsFactors = FALSE)
daH <- subset(daH , congress > 34 & congress < 118 & chamber=="House")
daH$roll_call <- paste0(daH$congress , "_", daH$rollnumber)

daH_temp <- read.csv("Data/Hall_members.csv" , stringsAsFactors = FALSE)
head(daH_temp)

daH <- join(daH , subset(daH_temp , select = c("congress","icpsr","state_abbrev","party_code","bioname")) , by = c("congress","icpsr"))
daH <- subset(daH , state_abbrev!="USA") %>%
  mutate(. , bioname = toupper(bioname))

### cast codes are 1 (yea; 2 paired 3 announced) 6 (nay; 4 announced 5 paired) 7/8 present and 9 abstain
### i'm trying to match exactly the procedure used in the original for constructing deviation
daH$yea <- NA
daH$yea[daH$cast_code%in%c(1,2,3)] <- 1
daH$yea[daH$cast_code%in%c(4,5,6)] <- 0
### original paper considers 1,2,3 as yeas
daH$not_voting <- as.numeric(daH$cast_code==9)

daH$r_party <- "other"
daH$r_party[daH$party_code==100] <- "Democrat"
daH$r_party[daH$party_code==200] <- "Republican"
table(daH$party_code)

daH <- as.data.table(daH)
daH <- daH[, `:=` (dem_yes = sum(na.omit(yea[r_party=="Democrat"])),
                   dem_voters = length(na.omit(yea[r_party=="Democrat"])),
                   rep_yes = sum(na.omit(yea[r_party=="Republican"])),
                   rep_voters = length(na.omit(yea[r_party=="Republican"]))
) , by = roll_call ]

daH$party_vote <- 0
daH$party_vote[daH$r_party=="Democrat" & (daH$dem_yes - daH$yea) > (daH$dem_voters - daH$dem_yes - (1- daH$yea))] <- 1
daH$party_vote[daH$r_party=="Republican" & (daH$rep_yes - daH$yea) > (daH$rep_voters - daH$rep_yes - (1- daH$yea))] <- 1
daH$party_vote[daH$r_party=="Democrat" & (daH$dem_yes - daH$yea) == (daH$dem_voters - daH$dem_yes - (1- daH$yea))] <- NA
daH$party_vote[daH$r_party=="Republican" & (daH$rep_yes - daH$yea) == (daH$rep_voters - daH$rep_yes - (1- daH$yea))] <- NA


daH$deviate <- as.numeric(daH$yea!=daH$party_vote)
daH$deviate[is.na(daH$yea)] <- NA
daH$deviate[daH$cast_code%in%c(7,8)] <- 1

daH <- daH[order(daH$roll_call , daH$bioname),]
daH <- daH[, `:=` (rank_p_overall = (seq_along(bioname)-1)/(length(bioname)-1)) , by = roll_call ]

daH2 <- subset(daH , !cast_code%in%c(9)) %>% as.data.table
daH2 <- daH2[order(daH2$roll_call , daH2$bioname),]
daH2 <- daH2[, `:=` (rank_p = (seq_along(bioname)-1)/(length(bioname)-1)) , by = roll_call ]

feols(deviate ~ rank_p  | icpsr, data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & congress>=93)) %>%
  summary(. , cluster = "congress")
### just run this to check everything is correct- small, insignificant negative effect

table9 <- matrix(NA , nrow = 2 , ncol = 4)

estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,congress), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate) & congress>=93)) %>%
  summary(. , cluster = "congress") %>% coeftable()
table9[1:2,1] <- estimate[1:2]

  estimate <- feols(deviate ~ rank_p  | interaction(state_abbrev,party_code,roll_call), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate) & congress>=93)) %>%
  summary(. , cluster = "congress")%>% coeftable()
table9[1:2,2] <- estimate[1:2]
  

estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,congress), data = subset(daH ,  !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate) & congress>=93)) %>%
  summary(. , cluster = "congress")%>% coeftable()
table9[1:2,3] <- estimate[1:2]

estimate <- feols(deviate ~ rank_p_overall  | interaction(state_abbrev,party_code,roll_call), data = subset(daH , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & congress>=93)) %>%
  summary(. , cluster = "congress")%>% coeftable()
table9[1:2,4] <- estimate[1:2]

sink("Generated Files/Replicated Table 9.txt")
table9 %>% round(. , 4)
sink(file = NULL)
